
# Load Data
setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data") # Office computer directory
load(("Final_Workspace_v3.RData"))
sipp <- total
#sipp <- read.csv("bothwaves.csv",header=TRUE)
#status2013 <- read.csv("status2013.csv")
#status2014 <- read.csv("status2014.csv")

# Create Household Object

	# Reduce Household IDS
	old_ids <- unique(sipp$SSUID)
	new_ids <- 1:length(sipp$SSUID)
	names(new_ids) <- old_ids
	sipp$household_id <- new_ids[as.character(sipp$SSUID)]

	sipp$year <- 2013
	sipp[sipp$SWAVE == 2,"year"] <- 2014
	sipp[sipp$SWAVE == 3,"year"] <- 2015
	
	sipp$household_year <- paste(sipp$household_id,sipp$year,sep="_")
	sipp$FPL <- pmax(0,sipp$THTOTINC/sipp$RHPOV)
	
	#household_sizes <- by(sipp$WPFINWGT,sipp$household_year,length)
	#sipp_households$household_size <- NA
	#sipp_households[names(household_sizes),"household_size"] <- household_sizes
	
	
	household_names <- unique(sipp$household_year)
	
	match_fields <- c("TMOVER",
		"STARTNONGROUP","ENDNONGROUP",
		"FPL",
		"percentyoung","percentmiddle","percentold",
		"percentmale",
		"percentasian","percentblack","percenthispanic","percentother")
	
	# Employer offer
		# EEMPNOESI: Employed, employer offers coverage to employees, but not on employer's plan
		# Reason did not buy ESI
			# EYNOESI_EXP: too expensive (has offer)
			# EYNOESI_NEW: not at job long enough to qualify
			# EYNOESI_HRS: part-time/temporary employee
			# EYNOESI_ELG: not eligible for another reason
			# EYNOESI_HTH: healthy and don't need it (has offer)
			# EYNOESI_ELS: able to get care elsewhere, such as at a clinic (has offer)
			# EYNOESI_COV: covered under someone else's plan (has offer)
			# EYNOESI_UNH: dissatisfied or don't believe in insurance (has offer)
			# EYNOESI_MIS: missed enrollment window
			# EYNOESI_OTH: other reason
	
	sipp$employer_offer <- 0
	sipp[(sipp$EYNOESI_COV == 1 & !is.na(sipp$EYNOESI_COV)) | 
			(sipp$EYNOESI_EXP == 1 & !is.na(sipp$EYNOESI_EXP)) | 
			(sipp$EYNOESI_HTH == 1 & !is.na(sipp$EYNOESI_HTH)) | 
			(sipp$EYNOESI_ELS == 1 & !is.na(sipp$EYNOESI_ELS)) | 
			(sipp$EYNOESI_UNH == 1 & !is.na(sipp$EYNOESI_UNH)),"employer_offer"] <- 1
	
	# Drop households that dropped after waves
	
		dropped_2013 <- unique(setdiff(sipp[sipp$year == 2013,"household_id"],sipp[sipp$year == 2014,"household_id"]))
		dropped_2014 <- unique(setdiff(sipp[sipp$year == 2014,"household_id"],sipp[sipp$year == 2015,"household_id"]))
	
	# Drop households that were never on nongroup
	
		sipp_households <- sipp[!duplicated(sipp$household_year),c("SSUID","household_id","year","household_year",
			"jan2013","jan2014","jan2015","dec2013","dec2014","dec2015",match_fields)]
		sipp_households <- sipp_households[!sipp_households$household_id %in% unique(c(dropped_2013,dropped_2014)),]
			
		households_on_nongroup <- unique(sipp[!is.na(sipp$STARTNONGROUP),"household_id"])
		sipp_households <- sipp_households[sipp_households$household_id %in% households_on_nongroup,]
		rownames(sipp_households) <- sipp_households$household_year
	
	# Add Household weight
	household_weights <- by(sipp[sipp$household_year %in% rownames(sipp_households),"WPFINWGT"],
		sipp[sipp$household_year %in% rownames(sipp_households),"household_year"],sum)
	sipp_households$weight <- NA
	sipp_households[names(household_weights),"weight"] <- household_weights
	
	# Add Household size
	household_sizes <- by(sipp[sipp$household_year %in% rownames(sipp_households),"WPFINWGT"],
		sipp[sipp$household_year %in% rownames(sipp_households),"household_year"],length)/12
	sipp_households$household_size <- NA
	sipp_households[names(household_sizes),"household_size"] <- household_sizes
	
	# Add employer offers
	employer_offers <- by(sipp[sipp$household_year %in% rownames(sipp_households),"employer_offer"],
		sipp[sipp$household_year %in% rownames(sipp_households),"household_year"],sum)
	sipp_households$employer_offer <- NA
	sipp_households[names(employer_offers),"employer_offer"] <- pmin(employer_offers,1)
	
	# Add max age
	max_ages <- by(sipp[sipp$household_year %in% rownames(sipp_households),"TAGE"],
		sipp[sipp$household_year %in% rownames(sipp_households),"household_year"],max)
	sipp_households$max_age <- NA
	sipp_households[names(max_ages),"max_age"] <- max_ages
	
	# Create household age variables
	number_0to17 <- by(sipp[sipp$household_year %in% rownames(sipp_households) & sipp$TAGE < 18,"TAGE"],
		sipp[sipp$household_year %in% rownames(sipp_households) & sipp$TAGE < 18,"household_year"],length)/12
	number_18to34 <- by(sipp[sipp$household_year %in% rownames(sipp_households) & sipp$TAGE >= 18 & sipp$TAGE < 35,"TAGE"],
		sipp[sipp$household_year %in% rownames(sipp_households) & sipp$TAGE >= 18 & sipp$TAGE < 35,"household_year"],length)/12
	number_35to54 <- by(sipp[sipp$household_year %in% rownames(sipp_households) & sipp$TAGE >= 35 & sipp$TAGE < 55,"TAGE"],
		sipp[sipp$household_year %in% rownames(sipp_households) & sipp$TAGE >= 35 & sipp$TAGE < 55,"household_year"],length)/12
	
	sipp_households[,c("perc_0to17","perc_18to34","perc_35to54")] <- 0
	sipp_households[names(number_0to17),"perc_0to17"] <- number_0to17/sipp_households[names(number_0to17),"household_size"]
	sipp_households[names(number_18to34),"perc_18to34"] <- number_18to34/sipp_households[names(number_18to34),"household_size"]
	sipp_households[names(number_35to54),"perc_35to54"] <- number_35to54/sipp_households[names(number_35to54),"household_size"]
	
	# Determine which households exited or entered
	
		sipp_households$entered <- 0
		sipp_households$exited <- 0
		
		nongroup_participants2013 <- sipp_households[sipp_households$year == 2013 & !is.na(sipp_households$STARTNONGROUP),"household_id"]
		nongroup_participants2014 <- sipp_households[sipp_households$year == 2014 & !is.na(sipp_households$STARTNONGROUP),"household_id"]
		nongroup_participants2015 <- sipp_households[sipp_households$year == 2015 & !is.na(sipp_households$STARTNONGROUP),"household_id"]
		
		entrants <- sipp_households[sipp_households$year == 2013 & sipp_households$household_id %in% nongroup_participants2014 & is.na(sipp_households$STARTNONGROUP),"household_id"]
		sipp_households[paste(entrants,"2014",sep="_"),"entered"] <- 1
		
		entrants <- sipp_households[sipp_households$year == 2014 & sipp_households$household_id %in% nongroup_participants2015 & is.na(sipp_households$STARTNONGROUP),"household_id"]
		sipp_households[paste(entrants,"2015",sep="_"),"entered"] <- 1
		
		
		#entrants <- sipp_households[sipp_households$year == 2013 & sipp_households$household_id %in% nongroup_participants2013 & sipp_households$STARTNONGROUP > 1,"household_id"]
		#sipp_households[paste(entrants,"2013",sep="_"),"entered"] <- 1
		
		#entrants <- sipp_households[sipp_households$year == 2014 & sipp_households$household_id %in% nongroup_participants2014 & sipp_households$STARTNONGROUP > 1,"household_id"]
		#sipp_households[paste(entrants,"2014",sep="_"),"entered"] <- 1
		
		departures <- sipp_households[sipp_households$year == 2014 & sipp_households$household_id %in% nongroup_participants2013 & is.na(sipp_households$STARTNONGROUP),"household_id"]
		sipp_households[paste(departures,"2013",sep="_"),"exited"] <- 1
		
		departures <- sipp_households[sipp_households$year == 2013 & sipp_households$household_id %in% nongroup_participants2013 & sipp_households$ENDNONGROUP < 12,"household_id"]
		sipp_households[paste(departures,"2013",sep="_"),"exited"] <- 1
		
		departures <- sipp_households[sipp_households$year == 2015 & sipp_households$household_id %in% nongroup_participants2014 & is.na(sipp_households$STARTNONGROUP),"household_id"]
		sipp_households[paste(departures,"2014",sep="_"),"exited"] <- 1
		
		departures <- sipp_households[sipp_households$year == 2014 & sipp_households$household_id %in% nongroup_participants2014 & sipp_households$ENDNONGROUP < 12,"household_id"]
		sipp_households[paste(departures,"2014",sep="_"),"exited"] <- 1
		
		#departures <- sipp_households[sipp_households$year == 2014 & sipp_households$household_id %in% nongroup_participants2014 & sipp_households$ENDNONGROUP < 12,"household_id"]
		#sipp_households[paste(departures,"2014",sep="_"),"exited"] <- 1
		
	
	
	# New Insurance Plan (Go with household head's plan)
	
	sipp_households$new_plan_after_exit <- 1
	sipp_households$old_plan_before_enter <- 1
	
		exiting_households <- sipp_households[sipp_households$exited == 1 & sipp_households$year == 2013,"household_id"]
		entering_households <- sipp_households[sipp_households$entered == 1  & sipp_households$year == 2014,"household_id"]
		
		sipp_households[paste(exiting_households,"2013",sep="_"),"new_plan_after_exit"] <- 
			sipp_households[paste(exiting_households,"2014",sep="_"),"jan2014"]
		
		sipp_households[paste(entering_households,"2014",sep="_"),"old_plan_before_enter"] <- 
			sipp_households[paste(entering_households,"2013",sep="_"),"jan2013"]
	
		exiting_households <- sipp_households[sipp_households$exited == 1 & sipp_households$year == 2014,"household_id"]
		entering_households <- sipp_households[sipp_households$entered == 1  & sipp_households$year == 2015,"household_id"]
		
		sipp_households[paste(exiting_households,"2014",sep="_"),"new_plan_after_exit"] <- 
			sipp_households[paste(exiting_households,"2015",sep="_"),"jan2015"]
		
		sipp_households[paste(entering_households,"2015",sep="_"),"old_plan_before_enter"] <- 
			sipp_households[paste(entering_households,"2014",sep="_"),"jan2014"]
	
	
	# Create dependent variables
		# Exited market
		# Entered market
	
		# Moved (tmover 4 and above)
		# Got ESI Offer
		# Went on Medicaid
		# Aged out
		
		# 1 if nongroup, 2 if military, 3 if medicare, 4 if ESI/group, 5 if medicaid/CHIP, 6 if 'other', 7 if uninsured
	
	
	sipp_households$entered_market <- NA
	sipp_households[sipp_households$old_plan_before_enter %in% c(7),"entered_market"] <- 0
	sipp_households[sipp_households$old_plan_before_enter %in% c(4,5),"entered_market"] <- 1
	sipp_households[sipp_households$employer_offer == 1 & sipp_households$entered_market == 0 & !is.na(sipp_households$entered_market),"entered_market"] <- 1
	sipp_households[sipp_households$TMOVER >= 4 & !is.na(sipp_households$TMOVER) &
		sipp_households$entered_market == 0 & !is.na(sipp_households$entered_market),"entered_market"] <- 1
	
	
	sipp_households$exited_market <- NA
	sipp_households[sipp_households$new_plan_after_exit %in% c(7),"exited_market"] <- 0
	sipp_households[sipp_households$new_plan_after_exit %in% c(4,5),"exited_market"] <- 1
	sipp_households[sipp_households$employer_offer == 1 & sipp_households$exited_market == 0 & !is.na(sipp_households$exited_market),"exited_market"] <- 1
	sipp_households[sipp_households$TMOVER >= 4 & !is.na(sipp_households$TMOVER) &
		sipp_households$exited_market == 0 & !is.na(sipp_households$exited_market),"exited_market"] <- 1
		
	drop_2013 <- FALSE
	if(drop_2013) {
		sipp_households[sipp_households$year == 2014,"entered_market"] <- NA
		sipp_households[sipp_households$year == 2013,"exited_market"] <- NA
	}
	
	
	
# Estimate Logit

	sipp_households$start_date <- sipp_households$STARTNONGROUP
	sipp_households$end_date <- sipp_households$ENDNONGROUP	
	sipp_households$perc_male <- sipp_households$percentmale	
	sipp_households$perc_asian <- sipp_households$percentasian
	sipp_households$perc_black <- sipp_households$percentblack
	sipp_households$perc_hispanic <- sipp_households$percenthispanic
	sipp_households$perc_other <- sipp_households$percentother	
		
		
	sipp_households$entered_late <- 0
	sipp_households[sipp_households$start_date > 5 & !is.na(sipp_households$start_date),"entered_late"] <- 1
	
	sipp_households$exited_early <- 0
	sipp_households[sipp_households$end_date < 12 & !is.na(sipp_households$end_date),"exited_early"] <- 1
	
	sipp_households$family <- as.numeric(sipp_households$household_size > 1)
	
	sipp_households[,c("STARTNONGROUP","ENDNONGROUP","percentyoung","percentmiddle","percentold","percentmale",
		"percentasian","percentblack","percenthispanic","percentother")] <- NULL	
		
	sipp_households$FPL_bracket <- "138orless"
	sipp_households[sipp_households$FPL > 1.38 & sipp_households$FPL <= 2.50,"FPL_bracket"] <- "138to250"
	sipp_households[sipp_households$FPL > 2.50 & sipp_households$FPL <= 4,"FPL_bracket"] <- "250to400"
	sipp_households[sipp_households$FPL > 4,"FPL_bracket"] <- "400ormore"
		
	sipp_households$transitioned <- pmax(sipp_households$entered_market,sipp_households$exited_market,na.rm=TRUE)	
	sipp_households$midyear_transition <- pmax(sipp_households$exited_early,sipp_households$entered_late,na.rm=TRUE)	
		
	# Entrants
	spec <- entered_market ~ entered_late + FPL + household_size + perc_0to17 + perc_18to34 + perc_35to54 +
		perc_male + perc_asian + perc_black + perc_hispanic + perc_other
	spec <- entered_market ~ entered_late + FPL_bracket + household_size + perc_0to17 + perc_18to34 + perc_35to54 +
		perc_male + perc_asian + perc_black + perc_hispanic + perc_other
	#sipp_logit <- glm(spec, data = sipp_households[!is.na(sipp_households$entered_market),], 
	#	family = "binomial", weights = sipp_households[!is.na(sipp_households$entered_market),"weight"])
	sipp_logit <- glm(spec, data = sipp_households[!is.na(sipp_households$entered_market),], family = "binomial")
	
	summary(sipp_logit)
	save(sipp_logit,file="sipp_logit_enter")
	
	# Departures
	spec <- exited_market ~ exited_early + FPL_bracket + household_size + perc_0to17 + perc_18to34 + perc_35to54 +
		perc_male + perc_asian + perc_black + perc_hispanic + perc_other
	sipp_logit <- glm(spec, data = sipp_households[!is.na(sipp_households$exited_market),], family = "binomial")
	summary(sipp_logit)
	save(sipp_logit,file="sipp_logit_exit")
	
	# Combined
	spec <- transitioned ~ as.factor(start_date) + as.factor(end_date) + FPL_bracket + household_size + perc_0to17 + perc_18to34 + perc_35to54 +
		perc_male + perc_asian + perc_black + perc_hispanic + perc_other
	spec <- transitioned ~ FPL_bracket + household_size + perc_0to17 + perc_18to34 + perc_35to54 + 
		perc_male + perc_asian + perc_black + perc_hispanic + perc_other + midyear_transition
	spec <- transitioned ~ FPL_bracket + household_size + perc_0to17 + perc_18to34 + perc_35to54 +
		perc_male + perc_asian + perc_black + perc_hispanic + perc_other
	
	sipp_lpm <- lm(spec, data = sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),])
	summary(sipp_lpm)
	
	sipp_logit <- glm(spec, data = sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),], family = "binomial")
	summary(sipp_logit)
	save(sipp_logit,file="sipp_logit")
	
	#sipp_ols <- lm(spec, data = sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),])
	#summary(sipp_ols)
	
	set.seed(2)
	predictions <- as.numeric(predict(sipp_logit,type="response",data=sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),]) > 
		runif(nrow(sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),])))
	
	table(predictions == 1 & sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),"transitioned"] == 1)
	table(predictions == 0 & sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),"transitioned"] == 1)
	table(predictions == 1 & sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),"transitioned"] == 0)
	table(predictions == 0 & sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),"transitioned"] == 0)

	probabilities <- predict(sipp_logit,type="response",data=sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),])
	predicted.classes <- ifelse(probabilities > 0.5, 1,0)
	mean(predicted.classes == sipp_households[!is.na(sipp_households$exited_market) | !is.na(sipp_households$entered_market),"transitioned"])
	
	library(stargazer)
	stargazer(sipp_logit)
	